set more off
pause off

/**********************************************************************
* Cheaper Faster and More than Good Enough: 
Is GPS the New Gold Standard in Land Area Measurement -- SRM

** File prepares data from 2013 Ethiopia Land and Soil Experiment for analysis.
** Data files available at: http://go.worldbank.org/03D10960G0
							or http://microdata.worldbank.org/index.php/catalog/2671

Note: All variables that could potentially be used to identify survey 
respondents have been withheld from public data release. Syntax that 
references these variables may also have been hidden in order to protect 
respondent confidentiality.							
***********************************************************************/

global et_data		/*XXX*/
global phdata 		"$et_data/PH"
global ppdata 		"$et_data/PP"
global cc			"$et_data/CC"
global shapemetrics	"$et_data/SupplementaryData/ShapeMetrics_LASER.dta"
global conversion	// conversion factors for self-reported land area units - 
					// found as part of the public data release for the 2011/12 
					// Ethiopia Rural Socioeconomic Survey 
					// (http://microdata.worldbank.org/index.php/catalog/2053/datafile/F1)

**********************
**Observation Counts**
**********************

use "$ppdata\COVER_PAGE.dta", clear
codebook cluster //85
codebook hhid // 1018

use "$ppdata\PP9_1_IN_FIELD_MEASUREMENT.dta", clear
count // 1018 field measured in 1st section
count if pp91_29!=. // 178 fields laid with 4x4 for crop-cutting
count if pp91_35==. & pp91_36==. // 4 fields without soils data (1 of which had CC)

use "$ppdata\PP9_2_IN_FIELD_MEASUREMENT.dta", clear
count // 1018
count if pp92_16!=. // 787 measured
count if pp92_29!=. & pp92_16!=. // 27 fields laid with 4x4 for crop-cutting
count if pp92_35==. & pp92_36==. & pp92_16!=. // 2 fields without soils data (1 of which had CC)


*******************
**Method Duration**
*******************

//1st field
use "$ppdata\PP9_1_IN_FIELD_MEASUREMENT.dta", clear
drop if pp91_16==. // 1 obs, no measurement
	
	**CR**
	replace pp91_11a=13 if pp91_11a==1 & pp91_3a==12
	replace pp91_11a=13 if pp91_11a==1 & pp91_3a==11
		
	gen x=pp91_3b/60
	gen start=pp91_3a + x
	
	gen y=pp91_11b/60
	gen finish=pp91_11a + y
	
	gen CRtime=finish-start
	label var CRtime "CR time (hours)"
	count if CRtime<0 // 10 observations (cleaning solution not clear)
	replace CRtime=. if CRtime<0
	
	**GPS**
	replace pp91_21a=13 if pp91_21a==1 & pp91_12a==12
	replace pp91_21a=13 if pp91_21a==1 & pp91_12a==11

	gen xx=pp91_12b/60
	gen gstart=pp91_12a + xx
	
	gen yy=pp91_21b/60
	gen gfinish=pp91_21a + yy
	
	gen GPStime=gfinish-gstart
	label var GPStime "GPS time (hours)"
	count if GPStime<0 // 11 observations (cleaning solution not clear)
	replace GPStime=. if GPStime<0

	**Soil**
	replace pp91_40a=13 if pp91_40a==1 & pp91_34a==12
	replace pp91_40a=13 if pp91_40a==1 & pp91_34a==11
	
	gen sm=pp91_34b/60
	gen sstart=pp91_34a + sm
	
	gen fm=pp91_40b/60
	gen sfinish=pp91_40a + fm
	gen SOILtime=sfinish-sstart
	count if SOILtime<0 // 1 observation (cleaning solution not clear)
	replace SOILtime=. if SOILtime<0
	
ren pp91_5 		CRarea
ren pp91_16 	GPSarea
ren pp91_25a	ALTarea

ren pp91_6  closingerr
ren pp91_19 treecover
ren pp91_20 weather
ren pp91_26 slope_clin
ren pp91_14 num_satellite
ren pp91_13 gps_accuracy
ren pp91_15 elevation
ren pp91_17 gps_perimeter
ren pp91_4	cr_perimeter
ren pp91_35 soilcolor_enum
ren pp91_36	rockcover
ren pp91_38a	soiltext_enum
ren pp91_25b	alt_perimeter
ren pp91_23		alt_acqseconds

keep hhid parcel_id field_id *area *time alt_acqseconds closingerr treecover alt_perimeter gps_perimeter cr_perimeter soilcolor_enum rockcover soiltext_enum weather slope gps_accuracy elevation num_satellite 
tempfile field1
save `field1'


//2nd field
use "$ppdata\PP9_2_IN_FIELD_MEASUREMENT.dta", clear
drop if pp92_16==. // 231 without measurement
	
	**CR**
	replace pp92_11a=13 if pp92_11a==1 & pp92_3a==12
	replace pp92_11a=13 if pp92_11a==1 & pp92_3a==11
		
	gen x=pp92_3b/60
	gen start=pp92_3a + x
	
	gen y=pp92_11b/60
	gen finish=pp92_11a + y
	
	gen CRtime=finish-start
	label var CRtime "CR time (hours)"

	count if CRtime<0 // 2 observations (cleaning solution not clear)
	replace CRtime=. if CRtime<0
	
	**GPS**
	replace pp92_21a=13 if pp92_21a==1 & pp92_12a==12
	replace pp92_21a=13 if pp92_21a==1 & pp92_12a==11

	gen xx=pp92_12b/60
	gen gstart=pp92_12a + xx
	
	gen yy=pp92_21b/60
	gen gfinish=pp92_21a + yy
	
	gen GPStime=gfinish-gstart
	label var GPStime "GPS time (hours)"
	count if GPStime<0 // 7 observations (cleaning solution not clear)
	replace GPStime=. if GPStime<0

	**Soil**
	replace pp92_40a=13 if pp92_40a==1 & pp92_34a==12
	replace pp92_40a=13 if pp92_40a==1 & pp92_34a==11
	
	gen sm=pp92_34b/60
	gen sstart=pp92_34a + sm
	
	gen fm=pp92_40b/60
	gen sfinish=pp92_40a + fm
	gen SOILtime=sfinish-sstart
	count if SOILtime<0 // 7 observation (cleaning solution not clear)
	replace SOILtime=. if SOILtime<0
	
ren pp92_5 		CRarea
ren pp92_16 	GPSarea
ren pp92_25a	ALTarea

ren pp92_6  closingerr
ren pp92_19 treecover
ren pp92_20 weather
ren pp92_26 slope_clin
ren pp92_14 num_satellite
ren pp92_13 gps_accuracy
ren pp92_15 elevation
ren pp92_17 gps_perimeter
ren pp92_4	cr_perimeter
ren pp92_35 soilcolor_enum
ren pp92_36	rockcover
ren pp92_38a	soiltext_enum
ren pp92_25b	alt_perimeter
ren pp92_23		alt_acqseconds

keep hhid parcel_id field_id alt_acqseconds *area *time gps_perimeter alt_perimeter cr_perimeter soilcolor_enum rockcover soiltext_enu closingerr treecover weather slope gps_accuracy elevation num_satellite 
tempfile field2
save `field2'

//Merge 1st & 2nd Fields
use `field1'
append using `field2'
drop if parcel==.
count // 1804 fields measured

foreach x in CRarea GPSarea ALTarea CRtime GPStime SOILtime {
replace `x'=. if `x'==0
}


************
**CLEAN UP**
************

	//Elevations out of range
	replace elevation=1731 if elevation==17 & hhid==805 & parcel==1 & field==2 // cluster mean
	replace elevation=2668 if elevation==268 & hhid==4005 & parcel==1 & field==6 // based on other field
	replace elevation=2667 if elevation==267 & hhid==4205 & parcel==2 & field==1 // based on other field
	replace elevation=1987 if elevation==7987 & hhid==1803 & parcel==1 & field==2 // based on other field
	replace elevation=1465 if elevation==14 & hhid==7005 & parcel==1 & field==5 // cluster mean

	//GPS Area & Perimeter
	replace gps_perimeter=. if gps_perimeter==3 & hhid==7110 & parcel==1 & field==6
	replace gps_perimeter=. if gps_perimeter==2 & hhid==1712 & parcel==2 & field==1

	//CR Area
	*Outlier re-calculated from compass bearings and side measurements
	replace CRarea=991.2 if hhid==1609 & parcel==2 & field==1
	
	replace gps_accuracy=. if gps_accuracy==0

*******************************************
**Convert objective measurements to acres**
*******************************************
foreach x in CR GPS ALT {
gen `x'_acres=`x'area*0.0002471053814672
}

tempfile obj
save `obj'

*****************
**Merge SR Area**
*****************

use "$ppdata\PP5_FIELD_ROSTER.dta", clear
		gen SRunit=pp5_3b
		merge m:1 hhid using "$ppdata\COVER_PAGE.dta", keepus(region zone woreda kebele ea cluster pp_cover_11 tablet_model)
		keep if _m==3
		drop _m
		merge m:1 region zone woreda SRunit using "$conversion"
			drop if _m==2 // unused combos
			drop _m
			replace conversion_sqm=1 if SRunit==2
			replace conversion_sqm=10000 if SRunit==1
			gen conv_woreda=1 if conversion_sqm!=.

			**Replace missing wereda-specific conversions with zonal, regional or national averages
			bysort SRunit zone: 	egen zoneconversion=mean(conversion_sqm)
			bysort SRunit region: 	egen regionconversion=mean(conversion_sqm)
			*Dummy for conversion level used
			gen conv_zone=1 if zoneconversion!=. & conversion_sqm==.
			replace conv_zone=1 if SRunit==4 & region==3 & (zone==1 | zone==2 | zone==6 | zone==7)
			gen conv_region=1 if regionconversion!=. & zoneconversion==. & conversion_sqm==.
			foreach x in conv_woreda conv_zone conv_region {
			replace `x'=0 if `x'==. 
			}
			replace conversion_sqm=zoneconversion if conversion_sqm==.
			replace conversion_sqm=regionconversion if conversion_sqm==.

			*Conversions 
			replace pp5_3a=. if pp5_3a==0
			gen SRmeter=pp5_3a*conversion_sqm
			gen SRacres=SRmeter*0.0002471
				
			keep zone pp5_3a pp5_3b pp5_3b_os pp5_4 pp5_5 pp5_5_os SRacres
			tempfile SRdata
			save `SRdata'
			
use `obj'		
merge 1:1 hhid parcel field using `SRdata', keepus(zone pp5_3a pp5_3b pp5_3b_os pp5_4 pp5_5 pp5_5_os SRacres)
keep if _m==3 | (_m==2 & pp5_5==1) // keeping cultivated fields only
drop _m pp5_5_os pp5_5
	ren SRacres SR_acres
	ren pp5_3a SR_quant
	ren pp5_3b SR_unit
	ren pp5_3b_os SR_unit_os
	ren pp5_4 SR_unitsperha

	count // 4160 cultivated fields
	tempfile areas
	save `areas'
	
****************
**Plot Corners**
****************	
	
	//1st field
	use "$ppdata\PP9_1_CR.dta", clear
	bysort hhid parcel field: egen num_corners=count(pp91_7a)
	drop if num_corners==1 // 3 obs
	collapse (max) num_corners, by(hhid parcel field)
	tempfile sides1
	save `sides1'
	
	//2nd field
	use "$ppdata\PP9_2_CR.dta", clear
	bysort hhid parcel field: egen num_corners=count(pp92_7a)
	drop if num_corners==1 // 4 obs
	collapse (max) num_corners, by(hhid parcel field)
	tempfile sides2
	save `sides2'

	use `sides1', clear
	append using `sides2'
	label var num_corners "number of corners in CR measurement"
	
	tempfile sides
	save `sides'
	
**************************
**Parcel Characteristics**
**************************

	use "$ppdata\PP4_PARCEL_ROSTER.dta", clear

		gen rented=(pp4_3==3)
		label var rented "parcel is rented"
		
		gen title=(pp4_6==1)
		label var title "HH has title, certificate or other document for parcel"

		gen collateral=(pp4_4==1)
		label var collateral "HH can sell or use parcel for collateral"
		
		ren pp4_9a dist_home
	
		keep hhid parcel_id dist_home rented title collateral
		
	tempfile parcel
	save `parcel'

*************************
**Field Characteristics**
*************************
	
	use "$ppdata\PP5_FIELD_ROSTER.dta", clear	
	keep if pp5_5==1 // cultivated fields only
	
		bysort hhid: egen num_cult_fields=count(hhid)
		label var num_cult_fields "number of fields cultivated by HH"
		
		ren pp5_6 manager_id
		
		gen manager_respond=1 if manager_id==pp5_2
		replace manager_respond=0 if manager_id!=pp5_2
		label var manager_respond "field manager provided SR area"
		
		gen irrigated=(pp5_17==1)
		label var irrigated "field is irrigated"
		
		keep hhid parcel field manager_id manager_respond irrigated num_cult_fields
		
	tempfile fieldroster
	save `fieldroster'
	
	use "$ppdata\PP6_FIELD_DETAILS.dta", clear
			
				//Clean Up - 1 HH with objective measurements missing SR soil Qs, use HH average (all fields the same)
					replace pp6_3=1 if pp6_3==. & hhid==5004 & parcel==2 & field==1
					replace pp6_5=2 if pp6_5==. & hhid==5004 & parcel==2 & field==1
					replace pp6_6=1 if pp6_6==. & hhid==5004 & parcel==2 & field==1
					replace pp6_7=1 if pp6_7==. & hhid==5004 & parcel==2 & field==1
					replace pp6_9=4 if pp6_9==. & hhid==5004 & parcel==2 & field==1

		ren pp6_3 slope_sr
		ren pp6_5 soiltype_sr
		ren pp6_7 soilqual_sr
		ren pp6_9 soiltext_sr
		
		gen fertilizer=(pp6_14==1)
		label var fertilizer "fertilizer is used on field (any type)"

		keep hhid parcel field slope_sr soiltype_sr soilqual_sr soiltext_sr fertilizer
		
	tempfile fielddetails
	save `fielddetails'
	
	use `fieldroster'
	merge 1:1 hhid parcel field using `fielddetails'
	keep if _m==3 // 6 don't match (no data in master)
	drop _m
	
	tempfile field
	save `field'
	
***************
**Cash Crops **
***************	
use "$et_data/PP7_CROP_DETAILS.dta", clear	

gen cashcrop_1 = (cropcode==72 | cropcode==71 | cropcode==73 | cropcode==76)
label var cashcrop_1 "coffee, chat, cotton, or sugarcane"
gen cashcrop_2 = 1 if (cashcrop_1==1 | cropcode==26 | cropcode==27 | cropcode==23)
	replace cashcrop_2=0 if cashcrop_2==.
label var cashcrop_2 "cashcrop_1 plus rapeseed, sesame, and lineseed"
	
collapse (max) cashcrop*, by(hhid parcel_id field_id)
tempfile cashcrop
save `cashcrop'
	
**********************
**HH Characteristics**
**********************
		
	use "$ppdata\PP1_HH_ROSTER.dta", clear

		bysort hhid: egen hhsize=count(individual_id)
		
		gen head_female=(pp1_5==1 & pp1_2==2)
		
		replace pp1_4=. if (pp1_4==99 | pp1_4==98) & pp1_3y==9999 & pp1_3m==99  // 5 obs
		gen head_age=pp1_4 if pp1_5==1
		
		gen head_agoccup=(pp1_16==1 & pp1_5==1)
		
		gen yrsed=.
			replace yrsed=0 if pp1_13==2 | pp1_13==.
			replace yrsed=pp1_14 if pp1_14<=13
			replace yrsed=13 if pp1_14==15 
			replace yrsed=14 if pp1_14==16 | pp1_14==32 | pp1_14==17 | pp1_14==14
			replace yrsed=9 if pp1_14==21 
			replace yrsed=10 if pp1_14==22 
			replace yrsed=11 if pp1_14==23 
			replace yrsed=12 if pp1_14==24 | pp1_14==27 | pp1_14==29
			replace yrsed=11 if pp1_14==25 | pp1_14==26 | pp1_14==28
			replace yrsed=13 if pp1_14==30 | pp1_14==31
			replace yrsed=15 if pp1_14==33 | pp1_14==18
			replace yrsed=16 if pp1_14==34 | pp1_14==35 | pp1_14==19
			replace yrsed=18 if pp1_14==20 // estimated 18 years for post-graduate completion
		
		gen head_yrsed=yrsed if pp1_5==1
		
		bysort hhid: egen max_edu=max(yrsed)
		
		gen head_literate=[(pp1_11==1 | pp1_12==1) & pp1_5==1]
	
		collapse (max) head_yrsed max_edu head_agoccup head_age head_female hhsize head_literate, by(hhid)
			label var hhsize "number of household members"
			label var head_female "female headed HH"
			label var head_age "HH head age (years)"
			label var head_agoccup "HH head's primary industry is ag."
			label var head_yrsed "HH head yrs of education"
			label var max_edu "highest education in HH"
			label var head_literate "HH head can read or write"

	tempfile head
	save `head'
	
***************************
**Manager Characteristics**
***************************	
		
	use "$ppdata\PP5_FIELD_ROSTER.dta", clear	
	keep if pp5_5==1 // cultivated fields only
	drop if pp5_6==. // no manager ID
	
		ren pp5_6 individual_id
		merge m:1 hhid individual_id using "$ppdata\PP1_HH_ROSTER.dta"
		keep if _m==3
		
		gen manager_head=(pp1_5==1)
		label var manager_head "field manager is HH head"
		
		gen manager_age=pp1_4
		label var manager_age "field manager age (yrs)"
		
		gen yrsed=.
			replace yrsed=0 if pp1_13==2 | pp1_13==.
			replace yrsed=pp1_14 if pp1_14<=13
			replace yrsed=13 if pp1_14==15 
			replace yrsed=14 if pp1_14==16 | pp1_14==32 | pp1_14==17 | pp1_14==14
			replace yrsed=9 if pp1_14==21 
			replace yrsed=10 if pp1_14==22 
			replace yrsed=11 if pp1_14==23 
			replace yrsed=12 if pp1_14==24 | pp1_14==27 | pp1_14==29
			replace yrsed=11 if pp1_14==25 | pp1_14==26 | pp1_14==28
			replace yrsed=13 if pp1_14==30 | pp1_14==31
			replace yrsed=15 if pp1_14==33 | pp1_14==18
			replace yrsed=16 if pp1_14==34 | pp1_14==35 | pp1_14==19
			replace yrsed=18 if pp1_14==20 // estimated 18 years for post-graduate completion
		
		gen manager_yrsed=yrsed 
		label var manager_yrsed "field manager education (yrs)"
		
		gen manager_literate=(pp1_11==1 | pp1_12==1)
		label var manager_lit "field manager can read or write"
		
		keep hhid parcel_id field_id manager_head manager_age manager_yrsed manager_literate
	
	tempfile manager
	save `manager'
	
***************************
**Cover details and merge**
***************************	
	
	use "$ppdata/COVER_PAGE.dta", clear
	
	gen survey=1
	label define SURVEY 1 "MLASS Ethiopia" 2 "Zanzibar Experiment" 3 "Nigeria Experiment" 4 "LSMS-ISA Malawi 2010/11"
	label val survey SURVEY
	gen GPS_model="Garmin eTrex 30"
	ren pp_cover_11 enum_id
	ren pp_cover_13 sup_id
	
	//split out interview month and year
	split pp_date, parse(/) gen(date)
	ren date2 int_month
	ren date3 int_year
	label var int_month "Month of interview"
	label var int_year "Year of interview"
	drop date1 pp_date
	
	merge 1:1 hhid using `head'
	drop _m
	
	keep region zone woreda kebele ea cluster hhid enum_id sup_id ///
		tablet_model survey GPS_model int_month int_year head_yrsed max_edu ///
		head_agoccup head_age head_female hhsize head_literate

	tempfile household
	save `household'
	
	use `field', clear
	merge m:1 hhid parcel_id using `parcel'
	keep if _m==3 
	drop _m
	
		merge 1:1 hhid parcel field using `manager'
		drop if _m==1
		drop _m
		
		merge 1:1 hhid parcel field using `areas'
		keep if _m==3 
		drop _m
		
		merge 1:1 hhid parcel field using `sides'
		drop if _m==2
		drop _m
		
		merge 1:1 hhid parcel field using `cashcrop'
		drop if _m==2
		drop _m
		
	merge m:1 hhid using `household'
	keep if _m==3
	drop _m
	
count 	
	
*****KEEP ONLY OBSERVATIONS WITH OBJECTIVE MEASUREMENT*****
	
	keep if GPS_acres!=. 
	count // 1797
	
	//9 obs missing head_age, take zone mean
	bysort zone: egen age_mean=mean(head_age)
	replace head_age=age_mean if head_age==.
	drop age_mean manager_id
	
	//Generate unique plot ID
	tostring parcel_id, gen(par)
	tostring field_id, gen(field)
	replace par="0"+par if parcel_id<10
	replace field="0"+field if field_id<10
	gen plotid=par+field
	label var plotid "UNIQUE - Parcel ID + Field ID"
	drop field par SOILtime

	order hhid plotid parcel_id field_id GPS_acres GPSarea gps_perimeter gps_accuracy ///
		num_satellite CR_acres CRarea cr_perimeter closingerr num_corners ALT_acres ///
		ALTarea alt_perimeter SR_acres SR_quant SR_unit SR_unit_os SR_unitsperha CRtime ///
		GPStime soiltype_sr soilqual_sr soiltext_sr soilcolor_enum soiltext_enum ///
		rockcover slope_sr slope_clin elevation treecover weather irrigated fertilizer ///
		dist_home rented title collateral manager_respond tablet_model GPS_model ///
		manager_head manager_age manager_yrsed manager_literate head_yrsed head_agoccup ///
		head_age head_female head_literate hhsize num_cult_fields max_edu region zone ///
		woreda kebele ea cluster enum_id sup_id tablet_model GPS_model int_month int_year survey

	
*************************
**Gen LEVELS of CR area**
*************************
	gen level_cr=.
	replace level_cr=1 if CR_acres<0.06177634536679  								// 250 sq. meters / 0.025 ha
	replace level_cr=2 if CR_acres>=0.06177634536679 & CR_acres<0.1235526907336 	// 500 sq. meters / 0.05 ha
	replace level_cr=3 if CR_acres>=0.1235526907336 & CR_acres<0.3706580722008 		// 1500 sq. meters / 0.15 ha
	replace level_cr=4 if CR_acres>=0.3706580722008 & CR_acres<0.6177634536679 		// 2500 sq. meters / 0.25 ha
	replace level_cr=5 if CR_acres>=0.6177634536679 & CR_acres<1.235526907336  		// 5000 sq. meters / 0.5 hectare
	replace level_cr=6 if CR_acres>=1.235526907336  & CR_acres!=. 

	//levels in ACRES
	gen level_cr_acres=.
	replace level_cr_acres=1 if CR_acres<0.05  								
	replace level_cr_acres=2 if CR_acres>=0.05 & CR_acres<0.15 	
	replace level_cr_acres=3 if CR_acres>=0.15 & CR_acres<0.35		
	replace level_cr_acres=4 if CR_acres>=0.35 & CR_acres<0.75		
	replace level_cr_acres=5 if CR_acres>=0.75 & CR_acres<1.25 		
	replace level_cr_acres=6 if CR_acres>=1.25  & CR_acres!=. 
	
**************************
**Gen LEVELS of GPS area**
**************************
	gen level_gps=.
	replace level_gps=1 if GPS_acres<0.06177634536679  								// 250 sq. meters / 0.025 ha
	replace level_gps=2 if GPS_acres>=0.06177634536679 & GPS_acres<0.1235526907336 	// 500 sq. meters / 0.05 ha
	replace level_gps=3 if GPS_acres>=0.1235526907336 & GPS_acres<0.3706580722008 		// 1500 sq. meters / 0.15 ha
	replace level_gps=4 if GPS_acres>=0.3706580722008 & GPS_acres<0.6177634536679 		// 2500 sq. meters / 0.25 ha
	replace level_gps=5 if GPS_acres>=0.6177634536679 & GPS_acres<1.235526907336  		// 5000 sq. meters / 0.5 hectare
	replace level_gps=6 if GPS_acres>=1.235526907336  & GPS_acres!=. 

	//levels in ACRES
	gen level_gps_acres=.
	replace level_gps_acres=1 if GPS_acres<0.05  								
	replace level_gps_acres=2 if GPS_acres>=0.05 & GPS_acres<0.15 	
	replace level_gps_acres=3 if GPS_acres>=0.15 & GPS_acres<0.35		
	replace level_gps_acres=4 if GPS_acres>=0.35 & GPS_acres<0.75		
	replace level_gps_acres=5 if GPS_acres>=0.75 & GPS_acres<1.25 		
	replace level_gps_acres=6 if GPS_acres>=1.25  & GPS_acres!=. 

	label define level_acres 1 "<0.05 acres" 2 "<0.15 acres" 3 "<0.35 acres" ///
		4 "<0.75 acres" 5 "<1.25 acres" 6 ">=1.25 acres"
	label val level_cr_acres level_acres
	label val level_gps_acres level_acres
		
	label define level 1 "<250 sq. meters / 0.025 ha" 2 "<500 sq. meters / 0.05 ha" 3 "<1500 sq. meters / 0.15 ha" ///
		4 "<2500 sq. meters / 0.25 ha" 5 "<5000 sq. meters / 0.5 hectare" 6 ">=5000 sq. meters / 0.5 hectare"
	label val level_cr level
	label val level_gps level

**************************
** Gen "bias" variables **
**************************

	gen bias_gps = SR_acres-GPS_acres
	label var bias_gps "SR_acres-GPS_acres"		

	gen bias_cr = GPS_acres-CR_acres
	label var bias_cr "GPS_acres-CR_acres"		

	gen abs_bias_cr=abs(bias_cr)
	gen per_bias_cr=(bias_cr/CR_acre)*100
	gen abs_per_cr=(abs/CR_acre)*100	
		label var abs_bias_cr "absolute val. GPS - CR (acres)"
		label var per_bias_cr "relative bias (bias_cr/CR_acre * 100)"
		label var abs_per_cr "absolute val. relative bias, (|bias_cr|/CR_acre * 100)"
		
***************************************
**Impute missings for select variables*
***************************************
	
	//Number of corners missing for 7 obs (only have 1 side in data)
	//CRtime missing for 16 obs [mostly "0"s or >possible time (start and end switched?)]
	//GPStime missing for 19 obs [mostly "0"s or >possible time (start and end switched?)]
	foreach var in CRtime GPStime {
		bysort level_cr: egen avg`var'=mean(`var')
		replace `var'=avg`var' if `var'==.
		drop avg`var'
	}
	foreach var in num_corners {
		bysort level_cr: egen mode`var'=mode(`var')
		replace `var'=mode`var' if `var'==.
		drop mode`var'
	}
	
	//gps_accuracy missing for 1 obs
	//slope_clin missing for 4 obs
	foreach var in gps_accuracy slope_clin {
		bysort cluster: egen avg`var'=mean(`var')
		replace `var'=avg`var' if `var'==.
		drop avg`var'
	}
	
	gen level_corner=.
	replace level_corner=1 if num_corners<=4
	replace level_corner=2 if num_corners>4 & num_corners<10
	replace level_corner=3 if num_corners>=10 & num_corners!=.
	label define CORNERS 1 "<= 4 sides" 2 "5 - 9 sides" 3 ">= 10 sides"
	label val level_corner CORNERS
	
	gen level_sat=.
	replace level_sat=1 if num_satellite<=15
	replace level_sat=2 if num_satellite>15 & num_satellite<20
	replace level_sat=3 if num_satellite>=20 & num_satellite!=.
	label define SATELLITES 1 "<=15 satellites" 2 "16 - 19 satellites" 3 ">= 20 satellites"
	label val level_sat SATELLITES

*************************************
** Gen aggregated SR unit variable **
*************************************

	label define SR_UNIT_AG 1 "Acre" 2 "Hectare" 3 "Sq. Meter" 4 "Non-Standard Unit"
	codebook SR_unit
	gen SR_unit_ag=2 if SR_unit==1 
	replace SR_unit_ag=3 if SR_unit==2
	replace SR_unit_ag=4 if SR_unit!=1 & SR_unit!=2 & SR_unit!=.
	label val SR_unit_ag SR_UNIT_AG
	
	gen standard_unit=(SR_unit_ag<4)
	label var standard_unit "SR in standard unit"
	
**Time in Minutes**
	foreach x in GPStime CRtime {
		gen `x'_min=`x'*60
		label var `x'_min "Measurement time (minutes)"
	}

	gen weather2=1 if weather==1 | weather==2 
	replace weather2=2 if weather==3 | weather==4 | weather==5 | weather==6 
	label define WEATHER2 1 "Clear/Partly Cloudy" 2 "Mostly Cloudy/All Cloudy/Rainy"
	label values weather2 WEATHER2
	label var weather2 "weather collapsed"
	
	gen CR2=CR_acres^2
	gen CR3=CR_acres^3

	
***********************
**Merge Shape Metrics**
***********************	
	
	merge 1:1 hhid parcel field using "$shapemetrics"
		
*******************************************
**TRIM TOP 1% OF ABSOLUTE VALUE OF % BIAS**
*******************************************	
		
	gen abs_bias_gps=abs(bias_gps)
	gen per_bias_gps=(bias_gps/GPS_acre)*100
	gen abs_per_gps=(abs_bias_gps/GPS_acre)*100
	
	sum abs_per_cr, d
	gen flagp1_cr=1 if abs_per_cr<r(p1)
	gen flagp99_cr=1 if abs_per_cr>r(p99) & abs_per_cr!=.

	sum abs_per_gps, d
	gen flagp1_gps=1 if abs_per_gps<r(p1)
	gen flagp99_gps=1 if abs_per_gps>r(p99) & abs_per_gps!=.

	drop if flagp99_cr==1 | flagp99_gps==1
	// DROPS 33 obs 

count //1765

**drop enumerator name labels
label values enum_id .
	

**save "$et_data/CheaperFaster_Ethiopia.dta", replace
	
